{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "584e7e58-52e2-4562-8da4-ab458088c3f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>STATEFP20</th>\n",
       "      <th>COUNTYFP20</th>\n",
       "      <th>TRACTCE20</th>\n",
       "      <th>GEOID20</th>\n",
       "      <th>NAME20</th>\n",
       "      <th>NAMELSAD20</th>\n",
       "      <th>MTFCC20</th>\n",
       "      <th>FUNCSTAT20</th>\n",
       "      <th>ALAND20</th>\n",
       "      <th>AWATER20</th>\n",
       "      <th>...</th>\n",
       "      <th>P0050002</th>\n",
       "      <th>P0050003</th>\n",
       "      <th>P0050004</th>\n",
       "      <th>P0050005</th>\n",
       "      <th>P0050006</th>\n",
       "      <th>P0050007</th>\n",
       "      <th>P0050008</th>\n",
       "      <th>P0050009</th>\n",
       "      <th>P0050010</th>\n",
       "      <th>geometry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12</td>\n",
       "      <td>011</td>\n",
       "      <td>110403</td>\n",
       "      <td>12011110403</td>\n",
       "      <td>1104.03</td>\n",
       "      <td>Census Tract</td>\n",
       "      <td>G5020</td>\n",
       "      <td>S</td>\n",
       "      <td>1323099</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>POLYGON ((-80.24758 25.99480, -80.24754 25.994...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>011</td>\n",
       "      <td>060114</td>\n",
       "      <td>12011060114</td>\n",
       "      <td>601.14</td>\n",
       "      <td>Census Tract</td>\n",
       "      <td>G5020</td>\n",
       "      <td>S</td>\n",
       "      <td>2598912</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>POLYGON ((-80.26810 26.19368, -80.26702 26.193...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12</td>\n",
       "      <td>011</td>\n",
       "      <td>060120</td>\n",
       "      <td>12011060120</td>\n",
       "      <td>601.20</td>\n",
       "      <td>Census Tract</td>\n",
       "      <td>G5020</td>\n",
       "      <td>S</td>\n",
       "      <td>12814719</td>\n",
       "      <td>1823779</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>POLYGON ((-80.36670 26.12828, -80.36649 26.128...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12</td>\n",
       "      <td>011</td>\n",
       "      <td>110347</td>\n",
       "      <td>12011110347</td>\n",
       "      <td>1103.47</td>\n",
       "      <td>Census Tract</td>\n",
       "      <td>G5020</td>\n",
       "      <td>S</td>\n",
       "      <td>2846117</td>\n",
       "      <td>545293</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>POLYGON ((-80.40957 26.03541, -80.40878 26.035...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>12</td>\n",
       "      <td>011</td>\n",
       "      <td>020421</td>\n",
       "      <td>12011020421</td>\n",
       "      <td>204.21</td>\n",
       "      <td>Census Tract</td>\n",
       "      <td>G5020</td>\n",
       "      <td>S</td>\n",
       "      <td>1060862</td>\n",
       "      <td>16632</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>POLYGON ((-80.24061 26.22083, -80.24056 26.220...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 345 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  STATEFP20 COUNTYFP20 TRACTCE20      GEOID20   NAME20    NAMELSAD20 MTFCC20  \\\n",
       "0        12        011    110403  12011110403  1104.03  Census Tract   G5020   \n",
       "1        12        011    060114  12011060114   601.14  Census Tract   G5020   \n",
       "2        12        011    060120  12011060120   601.20  Census Tract   G5020   \n",
       "3        12        011    110347  12011110347  1103.47  Census Tract   G5020   \n",
       "4        12        011    020421  12011020421   204.21  Census Tract   G5020   \n",
       "\n",
       "  FUNCSTAT20   ALAND20  AWATER20  ... P0050002 P0050003 P0050004 P0050005  \\\n",
       "0          S   1323099         0  ...        0        0        0        0   \n",
       "1          S   2598912         0  ...        0        0        0        0   \n",
       "2          S  12814719   1823779  ...        0        0        0        0   \n",
       "3          S   2846117    545293  ...        0        0        0        0   \n",
       "4          S   1060862     16632  ...        0        0        0        0   \n",
       "\n",
       "  P0050006 P0050007 P0050008 P0050009 P0050010  \\\n",
       "0        0       10        0        0       10   \n",
       "1        0       16        0        0       16   \n",
       "2        0        0        0        0        0   \n",
       "3        0        0        0        0        0   \n",
       "4        0        0        0        0        0   \n",
       "\n",
       "                                            geometry  \n",
       "0  POLYGON ((-80.24758 25.99480, -80.24754 25.994...  \n",
       "1  POLYGON ((-80.26810 26.19368, -80.26702 26.193...  \n",
       "2  POLYGON ((-80.36670 26.12828, -80.36649 26.128...  \n",
       "3  POLYGON ((-80.40957 26.03541, -80.40878 26.035...  \n",
       "4  POLYGON ((-80.24061 26.22083, -80.24056 26.220...  \n",
       "\n",
       "[5 rows x 345 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this is for working with census tracts and VTD precincts\n",
    "from shapely.geometry import Point, LineString, Polygon\n",
    "import shapely\n",
    "import geopandas as gpd\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "from numpy import random\n",
    "from scipy.stats import norm\n",
    "import math\n",
    "#tractGeomFile = gpd.read_file(\"state_map_files/fl_pl2020_t.shp\")  #Boo!  Only has geometries.  do not use\n",
    "tractPopFile = gpd.read_file(\"state_map_files/fl_pl2020_t.dbf\") #for Texas, need only this file\n",
    "tractPopFile.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4ce2f14-5084-4d68-a916-f58e178c6475",
   "metadata": {},
   "outputs": [],
   "source": [
    "#NOTE - this mini-file is for recovering the input tract info after it was overwritten by elPaso snip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3e904a5b-a054-4112-a96d-2d4e2fc9e276",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "there are 5160 popn tracts for FL\n"
     ]
    }
   ],
   "source": [
    "# EXTRACT TRACT GEOMETRIES AND POPULATIONS INTO LISTS, COMPUTE TRACT AREAS\n",
    "# If the population and geometry data are in one file, this should work.\n",
    "STATE = \"FL\"\n",
    "tractGeom = tractPopFile['geometry'] \n",
    "tractPop = tractPopFile['P0010001']\n",
    "tractVAP = tractPopFile['P0030001']   #NEW 3/2/22 - USE VAP\n",
    "# tractPop2 = tractPopFile['P0020001']   #not needed; confirmed that this matches P00100001 exactly\n",
    "tractHisp = tractPopFile['P0040002']   #NEW 3/2/22 - USE VAP\n",
    "tractBlack = tractPopFile['P0030004']  #NEW 3/2/22 - USE VAP\n",
    "nTracts = len(tractPop)\n",
    "print(\"there are {0} popn tracts for {1}\".format(nTracts, STATE) )\n",
    "tractArea = [0.]*nTracts\n",
    "for t in range (0,nTracts) :\n",
    "    tractArea[t] = tractGeom[t].area\n",
    "isSkippedTract = [0] *nTracts  #this will house a temporary list of tracts for manipulation\n",
    "tractPop = tractPop.to_numpy()  #to avoid panda overwrite grousing\n",
    "tractBlack= tractBlack.to_numpy()\n",
    "tractHisp = tractHisp.to_numpy()\n",
    "tractVAP = tractVAP.to_numpy()\n",
    "stateVAP = np.sum(tractVAP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ef6d042a-c5fb-4dc4-9fd7-eb9acff6ede2",
   "metadata": {},
   "outputs": [],
   "source": [
    "tractCPx = [0.]*nTracts\n",
    "tractCPy = [0.]*nTracts\n",
    "for t in range(nTracts):\n",
    "    tractCPx[t]=tractGeom[t].centroid.x\n",
    "    tractCPy[t]=tractGeom[t].centroid.y\n",
    "df = pd.DataFrame( {\"tractPop\":tractPop,\"tractHisp\":tractHisp,\"tractBlack\":tractBlack,\n",
    "                    \"centroid x\":tractCPx,\"centroid y\":tractCPy} )\n",
    "outname = STATE+\"inputData.csv\"\n",
    "outpath = \"state_HD_output/\"+outname\n",
    "df.to_csv(outpath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24ba6fe5-bc9d-4e8f-bb78-6b29868a8766",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
